# coding=utf-8
import requests
from bs4 import BeautifulSoup
import time

# 请求头
from teacher.utitl import MysqlHelper



class CJ():

    def xq(self,xqurl):
        print(xqurl)
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
                          'Chrome/49.0.2623.112 Safari/537.36'}

        html = requests.get(xqurl, headers=headers)
        htmltxt = BeautifulSoup(html.text, "html.parser")
        listdq = (htmltxt.find('div', attrs={'class': 'f-crumbs f-w1190'})).find_all('a')
        onej = listdq[2].text.replace("合租房", "").replace("租房", "")
        twoj = listdq[3].text.replace("合租房", "").replace("租房", "")
        therej = listdq[4].text.replace("合租房", "").replace("租房", "")

        name = htmltxt.find('p', attrs={'class': 'card-title'}).text.replace("\n", "")
        hm = htmltxt.find_all("li", attrs={'class': 'item f-fl'})

        hx = hm[0].find('span', attrs={"class": "content"}).text
        mj = hm[1].find('span', attrs={"class": "content"}).text
        zflx = ''
        if '整租' in mj:
            zflx = '整租'
        elif '合租' in mj:
            zflx = '合租'
        elif '单间' in mj:
            zflx = '单间'
        imgurl = 'http:' + htmltxt.find("div", attrs={'class': 'big-img-wrap'}).find('img').get('src')
        # print(imgurl.replace(" ", ""))
        price = htmltxt.find("span", attrs={"class": 'price'}).text
        mj1 = (mj.split('\xa0\xa0')[1]).replace("㎡", "")
        hm1 = htmltxt.find_all("li", attrs={'class': 'er-item f-fl'})
        if hm1[0].find('a') != None:
            xqname = hm1[0].find('a').text.replace("\n", "")
        else:
            xqname = hm1[0].find('span', attrs={"class": 'content'}).text.replace("\n", "")

        # print(name,price,hx,mj1)
        # print('######################')
        # print('一级区域：',onej)
        # print('二级区域:',twoj)
        # print("三级区域:",therej)
        # print("小区名称:",xqname)
        # print("标题:",name)
        # print("格局：",hx)
        # print("面积：",mj1)
        # print("价格：",price)
        # print("租房模式:",zflx)
        # print("图片:",imgurl)
        mysqlhe = MysqlHelper()
        cxsql = 'select * from gj where xqname="{0}" and name="{1}" and mj1="{2}" '.format(xqname, name, mj1)
        print(cxsql)
        cxlist = mysqlhe.get_all(cxsql)

        if len(cxlist) == 0:
            sql = "INSERT INTO gj (onej,twoj,therej,xqname,name,hx,mj1,price,zflx,imgurl)VALUES('{0}','{1}','{2}','{3}','{4}','{5}','{6}',{7},'{8}','{9}')".format(
                onej, twoj, therej, xqname, name, hx, mj1, int(price), zflx, imgurl)
            print(sql)
            mysqlhe.insert(sql)
        else:
            print("已存在")

    # for i in range(3,10):
    #     url='http://sh.ganji.com/chuzu/pn{0}/'.format(i)
    def shoyye(self,url):
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
                          'Chrome/49.0.2623.112 Safari/537.36'}
        html = requests.get(url, headers=headers)
        htmltxt = BeautifulSoup(html.text, "html.parser")
        # print("4567890htm",htmltxt)
        a = 0
        for ii in htmltxt.find_all("div", attrs={'class': 'f-list-item ershoufang-list'}):
            a = a + 1
            if ii.find('a').get('href').find('https://'):
                self.xq("https:" + ii.find('a').get('href'))
                # print(a)
                # print("2@@@@@@@@@@@@@@@@@@@@@@")
            if a == 10:
                break
if __name__ == '__main__':
    cj=CJ()
    url='http://fz.ganji.com/chuzu/pn3/'
    cj.shoyye(url)



